home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_100
/
169_01
/
sq.c86
< prev
next >
Wrap
Text File
|
1984-07-27
|
22KB
|
778 lines
/*% cc -O -n % -o sq
*/
#include <stdio.h>
#define TRUE 1
#define rewind(c) fseek(c,0L,0); /* 1.7 */
#define FALSE 0
#define ERROR (-1)
#define PATHLEN 312 /* Number of characters allowed in pathname */
#define ALTNAME "sq.out"
/* Definitions and external declarations */
#define RECOGNIZE 0xFF76 /* unlikely pattern */
/* *** Stuff for first translation module *** */
#define DLE 0x90
/* *** Stuff for second translation module *** */
#define SPEOF 256 /* special endfile token */
#define NUMVALS 257 /* 256 data values plus SPEOF*/
/* Definitions and external declarations */
int Usestd; /* Use stdout for squeezed output */
unsigned crc; /* error check code */
/* *** Stuff for first translation module *** */
int likect; /*count of consecutive identical chars */
int lastchar, newchar;
char state;
/* states */
#define NOHIST 0 /*don't consider previous input*/
#define SENTCHAR 1 /*lastchar set, no lookahead yet */
#define SENDNEWC 2 /*newchar set, previous sequence done */
#define SENDCNT 3 /*newchar set, DLE sent, send count next */
/* *** Stuff for second translation module *** */
#define NOCHILD -1 /* indicates end of path through tree */
#define NUMNODES (NUMVALS + NUMVALS - 1) /* nbr of nodes */
#define MAXCOUNT 65535 /* biggest unsigned integer */
/* The following array of structures are the nodes of the
* binary trees. The first NUMVALS nodes becomethe leaves of the
* final tree and represent the values of the data bytes being
* encoded and the special endfile, SPEOF.
* The remaining nodes become the internal nodes of the final tree.
*/
struct nd {
unsigned weight; /* number of appearances */
int tdepth; /* length on longest path in tre*/
int lchild, rchild; /* indexes to next level */
}
node[NUMNODES];
int dctreehd; /*index to head node of final tree */
/* This is the encoding table:
* The bit strings have first bit in low bit.
* Note that counts were scaled so code fits unsigned integer
*/
int codelen[NUMVALS]; /* number of bits in code */
unsigned code[NUMVALS]; /* code itself, right adjusted */
unsigned tcode; /* temporary code value */
/* Variables used by encoding process */
int curin; /* Value currently being encoded */
int cbitsrem; /* Number of code string bits remaining */
unsigned ccode; /* Current code shifted so next code bit is at right */
/* This program compresses a file without losing information.
* The usq.com program is required to unsqueeze the file
* before it can be used.
*
* Typical compression rates are:
* .COM 6% (Don't bother)
* .ASM 33% (using full ASCII set)
* .DIC 46% (using only uppercase and a few others)
* Squeezing a really big file takes a few minutes.
*
* Useage:
* sq file ...
*
*
* The squeezed file name is formed by changing the second from last
* letter of the file type to Q. If there is no file type,
* the squeezed file type is QQQ. If the name exists it is
* overwritten!
*
* The transformations compress strings of identical bytes and
* then encode each resulting byte value and EOF as bit strings
* having lengths in inverse proportion to their frequency of
* occurrance in the intermediate input stream. The latter uses
* the Huffman algorithm. Decoding information is included in
* the squeezed file, so squeezing short files or files with
* uniformly distributed byte values will actually increase size.
*/
/* CHANGE HISTORY:
* 1.5u **nix version - means output to stdout.
* (stdin not allowed becuase sq needs to rewind input, which
* won't work with pipes.)
* Filename generation changed to suit **nix and stdio.
* 1.6u machine independent output for file compatibility with
* original CP/M version SQ, when running on machine with
* IBM byte ordering such as Z8000 and 68000.
*
* 1.7 08/13/83 C86 Version by Wayne Fruhwald
* Converted to work with Computer Innovation's C86 c compiler under MSDOS.
*
*/
#define VERSION "1.7 8-13-83" /* 1.7 */
main(argc, argv)
int argc;
char *argv[];
{
register i,c;
/* Process the parameters in order */
for(i = 1; i < argc; ++i) {
if(strcmp(argv[i], "-")==0)
Usestd = TRUE;
}
for(i = 1; i < argc; ++i) {
if(strcmp(argv[i], "-")!=0)
obey(argv[i]);
}
if(argc < 2) {
fprintf(stderr,"File squeezer version %s by\n\tRichard Greenlaw\n\t251 Colony Ct.\n\tGahanna, Ohio 43230\n", VERSION);
fprintf(stderr, "Usage: sq [-] pathname ...\n");
fprintf(stderr, "\t- squeezed output to stdout\n");
exit(1);
}
exit(0);
}
obey(p)
char *p;
{
char *w,*q;
char outfile[PATHLEN+2]; /* output file spec. */
/* First build output file name */
strcpy(outfile, p);
/* Find and change output file type */
for(w=q = outfile; *q != '\0'; ++q) /* skip leading /'s */
if( *q == '/')
w = q+1;
for(q = w; *q != '\0'; ++q)
if(*q == '.')
if(*(q + 1) == '\0')
*q = '\0'; /* kill trailing dot */
else
switch(*(q+2)) {
case 'q':
case 'Q':
fprintf(stderr, "\n%s ignored ( already squeezed?)", p);
return;
case '\0':
*(q+3) = '\0';
/* fall thru */
default:
*(q + 2) = 'Q';
goto named;
}
/* No file type */
strcat(outfile, ".QQQ");
named:
if(strlen(w)>14)
strcpy(outfile, ALTNAME); /* check for too long name */
squeeze(p, outfile);
}
squeeze(infile, outfile)
char *infile, *outfile;
{
register i, c;
FILE *inbuff, *outbuff; /* file buffers */
fprintf(stderr, "\n%s -> %s: ", infile, outfile);
if((inbuff=fopen(infile, "rb")) == NULL) { /* 1.7 */
fprintf(stderr, "Can't open %s for input pass 1\n", infile);
return;
}
if(Usestd)
outbuff=stdout;
else if((outbuff=fopen(outfile, "wb")) == NULL) { /* 1.7 */
fprintf(stderr, "Can't create %s\n", outfile);
fclose(inbuff);
return;
}
/* First pass - get properties of file */
crc = 0; /* initialize checksum */
fprintf(stderr, "analyzing, ");
init_ncr();
init_huff(inbuff);
rewind(inbuff);
/* Write output file header with decoding info */
wrt_head(outbuff, infile);
/* Second pass - encode the file */
fprintf(stderr, "squeezing, ");
init_ncr(); /* For second pass */
/* Translate the input file into the output file */
while((c = gethuff(inbuff)) != EOF)
if(putc(c, outbuff) == ERROR && ferror(outbuff)) {
fprintf(stderr, "ERROR - write failure in %s\n", outfile);
goto closeall;
}
fprintf(stderr, " done.\n");
closeall:
fclose(inbuff);
closeout:
fflush(outbuff);
fclose(outbuff);
}
/* First translation - encoding of repeated characters
* The code is byte for byte pass through except that
* DLE is encoded as DLE, zero and repeated byte values
* are encoded as value, DLE, count for count >= 3.
*/
init_ncr() /*initialize getcnr() */
{
state = NOHIST;
}
int
getcnr(iob)
FILE *iob;
{
switch(state) {
case NOHIST:
/* No relevant history */
state = SENTCHAR;
return lastchar = getc_crc(iob);
case SENTCHAR:
/* Lastchar is set, need lookahead */
switch(lastchar) {
case DLE:
state = NOHIST;
return 0; /* indicates DLE was the data */
case EOF:
return EOF;
default:
for(likect = 1; (newchar = getc_crc(iob)) == lastchar && likect < 255; ++likect)
;
switch(likect) {
case 1:
return lastchar = newchar;
case 2:
/* just pass through */
state = SENDNEWC;
return lastchar;
default:
state = SENDCNT;
return DLE;
}
}
case SENDNEWC:
/* Previous sequence complete, newchar set */
state = SENTCHAR;
return lastchar = newchar;
case SENDCNT:
/* Sent DLE for repeat sequence, send count */
state = SENDNEWC;
return likect;
default:
fprintf(stderr,"Bug - bad state\n");
exit(1);
/* NOTREACHED */
}
}
/******** Second translation - bytes to variable length bit strings *********/
/* This translation uses the Huffman algorithm to develop a
* binary tree representing the decoding information for
* a variable length bit string c